import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller

# 数据处理函数（沿用之前的）
def process_data():
    excel_file = pd.ExcelFile("ori_message.xlsx")
    years = ['2016', '2017', '2018', '2019', '2020', '2021']
    all_monthly_data = []
    for year in years:
        df = excel_file.parse(year)
        df['年'] = df['年'].ffill().astype(int)
        df['月'] = df['月'].ffill().astype(int)
        grouped = df.groupby(['年', '月']).agg({
            '流量(m3/s)': 'mean',
            '含沙量(kg/m3) ': 'mean'
        }).reset_index()
        grouped['排沙量(kg/s)'] = grouped['流量(m3/s)'] * grouped['含沙量(kg/m3) ']
        grouped = grouped.round({
            '流量(m3/s)': 2,
            '含沙量(kg/m3) ': 4,
            '排沙量(kg/s)': 2
        })
        all_monthly_data.append(grouped)
    result = pd.concat(all_monthly_data, ignore_index=True)
    result = result.sort_values(by=['年', '月']).reset_index(drop=True)
    result = result.rename(columns={'含沙量(kg/m3) ': '含沙量(kg/m3)'})
    result['时间'] = pd.to_datetime(result['年'].astype(str) + '-' + result['月'].astype(str) + '-01')
    return result

if __name__ == "__main__":
    processed_data = process_data()
    processed_data.set_index('时间', inplace=True)  # 设置时间为索引
    flow_series = processed_data['流量(m3/s)']

    # 一阶差分
    diff1 = flow_series.diff(1).dropna()
    # Dickey - Fuller检验一阶差分
    result1 = adfuller(diff1)
    p_value1 = result1[1]

    # 二阶差分
    diff2 = diff1.diff(1).dropna()
    # Dickey - Fuller检验二阶差分
    result2 = adfuller(diff2)
    p_value2 = result2[1]

    # 绘制一阶差分图（图21）
    plt.figure(figsize=(12, 6))
    plt.plot(diff1, label='First - Order Differenced Flow', color='black')
    plt.title('Time Series Analysis Plots\nDickey - Fuller: p={:.5f}'.format(p_value1), fontsize=12)
    plt.xlabel('Year', fontsize=10)
    plt.ylabel('Differenced Flow (m³/s)', fontsize=10)
    plt.grid(linestyle='--', alpha=0.7)
    plt.savefig('图21.一阶差分序列图.png', dpi=300, bbox_inches='tight')
    plt.show()

    # 绘制二阶差分图（图22）
    plt.figure(figsize=(12, 6))
    plt.plot(diff2, label='Second - Order Differenced Flow', color='black')
    plt.title('Time Series Analysis Plots\nDickey - Fuller: p={:.5f}'.format(p_value2), fontsize=12)
    plt.xlabel('Year', fontsize=10)
    plt.ylabel('Differenced Flow (m³/s)', fontsize=10)
    plt.grid(linestyle='--', alpha=0.7)
    plt.savefig('图22.二阶差分序列图.png', dpi=300, bbox_inches='tight')
    plt.show()